Google Sheets

Reading data with the googlesheets package

install.packages("googlesheets")
library(googlesheets)

sheets_url = paste0("https://docs.google.com/spreadsheets/d/",
                    "1WBrH655fxqKW1QqvD5hnqvvEWIvRzDJcKEgjjFeYxeM")

gsurl1 = gs_url(sheets_url)
Warning: `as_data_frame()` is deprecated as of tibble 2.0.0.
Please use `as_tibble()` instead.
The signature and semantics have changed, see `?as_tibble`.
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
dat = gs_read(gsurl1)
date_read = lubridate::today()
head(dat)
# A tibble: 6 x 12
    Git Github     R Rstudio `Reproducible R… `R markdown` `Data import`
  <dbl>  <dbl> <dbl>   <dbl>            <dbl>        <dbl>         <dbl>
1     9      9     9       9                9            9            10
2     1      1     5       5                7            5             4
3     0      0     2       3                3            0             3
4     0      1     1       1                1            1             1
5     0      2     7       7                7            5             8
6     2      2     3       2                0            0             1
# … with 5 more variables: `Web scraping` <dbl>, `Data cleaning` <dbl>,
#   dplyr <dbl>, Bioconductor <dbl>, `Regular expressions` <dbl>

Googlesheets4 - API v4 Google Sheets Package

What if I don’t want it public?

library(googlesheets4)
# May be necessary on rstudio.cloud
options(httr_oob_default=TRUE)
# Will ask you to log in
out = read_sheet(sheets_url)

Can also save and load a token

token = readr::read_rds("googledrive_token.rds")
library(googledrive)
drive_auth(token = token) # could also use googlesheets4::gs4_auth
library(googlesheets4)
out = read_sheet(sheets_url)
head(out)
# A tibble: 6 x 12
    Git Github     R Rstudio `Reproducible R… `R markdown` `Data import`
  <dbl>  <dbl> <dbl>   <dbl>            <dbl>        <dbl>         <dbl>
1     9      9     9       9                9            9            10
2     1      1     5       5                7            5             4
3     0      0     2       3                3            0             3
4     0      1     1       1                1            1             1
5     0      2     7       7                7            5             8
6     2      2     3       2                0            0             1
# … with 5 more variables: `Web scraping` <dbl>, `Data cleaning` <dbl>,
#   dplyr <dbl>, Bioconductor <dbl>, `Regular expressions` <dbl>

Google Sheets
https://SISBIB.github.io/Module1/labs/google-sheets-lab.Rmd

JSON: JavaScript Object Notation
Lists of stuff

Why JSON matters

#install.packages("jsonlite")
library(jsonlite)
jsonData <- fromJSON("https://api.github.com/users/jtleek/repos")
head(jsonData)
         id                          node_id               name
1 155565363 MDEwOlJlcG9zaXRvcnkxNTU1NjUzNjM=               2018
2 264786491 MDEwOlJlcG9zaXRvcnkyNjQ3ODY0OTE=            ads2020
3 101394164 MDEwOlJlcG9zaXRvcnkxMDEzOTQxNjQ=         advdatasci
4 111447948 MDEwOlJlcG9zaXRvcnkxMTE0NDc5NDg= advdatasci-project
5  47568815 MDEwOlJlcG9zaXRvcnk0NzU2ODgxNQ==   advdatasci-swirl
6  41645119 MDEwOlJlcG9zaXRvcnk0MTY0NTExOQ==       advdatasci15
                  full_name private owner.login owner.id        owner.node_id
1               jtleek/2018   FALSE      jtleek  1571674 MDQ6VXNlcjE1NzE2NzQ=
2            jtleek/ads2020   FALSE      jtleek  1571674 MDQ6VXNlcjE1NzE2NzQ=
3         jtleek/advdatasci   FALSE      jtleek  1571674 MDQ6VXNlcjE1NzE2NzQ=
4 jtleek/advdatasci-project   FALSE      jtleek  1571674 MDQ6VXNlcjE1NzE2NzQ=
5   jtleek/advdatasci-swirl   FALSE      jtleek  1571674 MDQ6VXNlcjE1NzE2NzQ=
6       jtleek/advdatasci15   FALSE      jtleek  1571674 MDQ6VXNlcjE1NzE2NzQ=
                                      owner.avatar_url owner.gravatar_id
1 https://avatars2.githubusercontent.com/u/1571674?v=4                  
2 https://avatars2.githubusercontent.com/u/1571674?v=4                  
3 https://avatars2.githubusercontent.com/u/1571674?v=4                  
4 https://avatars2.githubusercontent.com/u/1571674?v=4                  
5 https://avatars2.githubusercontent.com/u/1571674?v=4                  
6 https://avatars2.githubusercontent.com/u/1571674?v=4                  
                            owner.url            owner.html_url
1 https://api.github.com/users/jtleek https://github.com/jtleek
2 https://api.github.com/users/jtleek https://github.com/jtleek
3 https://api.github.com/users/jtleek https://github.com/jtleek
4 https://api.github.com/users/jtleek https://github.com/jtleek
5 https://api.github.com/users/jtleek https://github.com/jtleek
6 https://api.github.com/users/jtleek https://github.com/jtleek
                            owner.followers_url
1 https://api.github.com/users/jtleek/followers
2 https://api.github.com/users/jtleek/followers
3 https://api.github.com/users/jtleek/followers
4 https://api.github.com/users/jtleek/followers
5 https://api.github.com/users/jtleek/followers
6 https://api.github.com/users/jtleek/followers
                                         owner.following_url
1 https://api.github.com/users/jtleek/following{/other_user}
2 https://api.github.com/users/jtleek/following{/other_user}
3 https://api.github.com/users/jtleek/following{/other_user}
4 https://api.github.com/users/jtleek/following{/other_user}
5 https://api.github.com/users/jtleek/following{/other_user}
6 https://api.github.com/users/jtleek/following{/other_user}
                                      owner.gists_url
1 https://api.github.com/users/jtleek/gists{/gist_id}
2 https://api.github.com/users/jtleek/gists{/gist_id}
3 https://api.github.com/users/jtleek/gists{/gist_id}
4 https://api.github.com/users/jtleek/gists{/gist_id}
5 https://api.github.com/users/jtleek/gists{/gist_id}
6 https://api.github.com/users/jtleek/gists{/gist_id}
                                           owner.starred_url
1 https://api.github.com/users/jtleek/starred{/owner}{/repo}
2 https://api.github.com/users/jtleek/starred{/owner}{/repo}
3 https://api.github.com/users/jtleek/starred{/owner}{/repo}
4 https://api.github.com/users/jtleek/starred{/owner}{/repo}
5 https://api.github.com/users/jtleek/starred{/owner}{/repo}
6 https://api.github.com/users/jtleek/starred{/owner}{/repo}
                            owner.subscriptions_url
1 https://api.github.com/users/jtleek/subscriptions
2 https://api.github.com/users/jtleek/subscriptions
3 https://api.github.com/users/jtleek/subscriptions
4 https://api.github.com/users/jtleek/subscriptions
5 https://api.github.com/users/jtleek/subscriptions
6 https://api.github.com/users/jtleek/subscriptions
                   owner.organizations_url
1 https://api.github.com/users/jtleek/orgs
2 https://api.github.com/users/jtleek/orgs
3 https://api.github.com/users/jtleek/orgs
4 https://api.github.com/users/jtleek/orgs
5 https://api.github.com/users/jtleek/orgs
6 https://api.github.com/users/jtleek/orgs
                            owner.repos_url
1 https://api.github.com/users/jtleek/repos
2 https://api.github.com/users/jtleek/repos
3 https://api.github.com/users/jtleek/repos
4 https://api.github.com/users/jtleek/repos
5 https://api.github.com/users/jtleek/repos
6 https://api.github.com/users/jtleek/repos
                                      owner.events_url
1 https://api.github.com/users/jtleek/events{/privacy}
2 https://api.github.com/users/jtleek/events{/privacy}
3 https://api.github.com/users/jtleek/events{/privacy}
4 https://api.github.com/users/jtleek/events{/privacy}
5 https://api.github.com/users/jtleek/events{/privacy}
6 https://api.github.com/users/jtleek/events{/privacy}
                            owner.received_events_url owner.type
1 https://api.github.com/users/jtleek/received_events       User
2 https://api.github.com/users/jtleek/received_events       User
3 https://api.github.com/users/jtleek/received_events       User
4 https://api.github.com/users/jtleek/received_events       User
5 https://api.github.com/users/jtleek/received_events       User
6 https://api.github.com/users/jtleek/received_events       User
  owner.site_admin                                     html_url
1            FALSE               https://github.com/jtleek/2018
2            FALSE            https://github.com/jtleek/ads2020
3            FALSE         https://github.com/jtleek/advdatasci
4            FALSE https://github.com/jtleek/advdatasci-project
5            FALSE   https://github.com/jtleek/advdatasci-swirl
6            FALSE       https://github.com/jtleek/advdatasci15
                                                               description
1 Fall 2018 repository with course materials for JHU Advanced Data Science
2                                       Advanced Data Science 2020 Edition
3                                                                     <NA>
4                                                        Awesome project! 
5                                                                     <NA>
6                                     Advanced Data Science @ JHU Biostats
   fork                                                    url
1  TRUE               https://api.github.com/repos/jtleek/2018
2 FALSE            https://api.github.com/repos/jtleek/ads2020
3  TRUE         https://api.github.com/repos/jtleek/advdatasci
4 FALSE https://api.github.com/repos/jtleek/advdatasci-project
5 FALSE   https://api.github.com/repos/jtleek/advdatasci-swirl
6 FALSE       https://api.github.com/repos/jtleek/advdatasci15
                                                     forks_url
1               https://api.github.com/repos/jtleek/2018/forks
2            https://api.github.com/repos/jtleek/ads2020/forks
3         https://api.github.com/repos/jtleek/advdatasci/forks
4 https://api.github.com/repos/jtleek/advdatasci-project/forks
5   https://api.github.com/repos/jtleek/advdatasci-swirl/forks
6       https://api.github.com/repos/jtleek/advdatasci15/forks
                                                              keys_url
1               https://api.github.com/repos/jtleek/2018/keys{/key_id}
2            https://api.github.com/repos/jtleek/ads2020/keys{/key_id}
3         https://api.github.com/repos/jtleek/advdatasci/keys{/key_id}
4 https://api.github.com/repos/jtleek/advdatasci-project/keys{/key_id}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/keys{/key_id}
6       https://api.github.com/repos/jtleek/advdatasci15/keys{/key_id}
                                                                    collaborators_url
1               https://api.github.com/repos/jtleek/2018/collaborators{/collaborator}
2            https://api.github.com/repos/jtleek/ads2020/collaborators{/collaborator}
3         https://api.github.com/repos/jtleek/advdatasci/collaborators{/collaborator}
4 https://api.github.com/repos/jtleek/advdatasci-project/collaborators{/collaborator}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/collaborators{/collaborator}
6       https://api.github.com/repos/jtleek/advdatasci15/collaborators{/collaborator}
                                                     teams_url
1               https://api.github.com/repos/jtleek/2018/teams
2            https://api.github.com/repos/jtleek/ads2020/teams
3         https://api.github.com/repos/jtleek/advdatasci/teams
4 https://api.github.com/repos/jtleek/advdatasci-project/teams
5   https://api.github.com/repos/jtleek/advdatasci-swirl/teams
6       https://api.github.com/repos/jtleek/advdatasci15/teams
                                                     hooks_url
1               https://api.github.com/repos/jtleek/2018/hooks
2            https://api.github.com/repos/jtleek/ads2020/hooks
3         https://api.github.com/repos/jtleek/advdatasci/hooks
4 https://api.github.com/repos/jtleek/advdatasci-project/hooks
5   https://api.github.com/repos/jtleek/advdatasci-swirl/hooks
6       https://api.github.com/repos/jtleek/advdatasci15/hooks
                                                               issue_events_url
1               https://api.github.com/repos/jtleek/2018/issues/events{/number}
2            https://api.github.com/repos/jtleek/ads2020/issues/events{/number}
3         https://api.github.com/repos/jtleek/advdatasci/issues/events{/number}
4 https://api.github.com/repos/jtleek/advdatasci-project/issues/events{/number}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/issues/events{/number}
6       https://api.github.com/repos/jtleek/advdatasci15/issues/events{/number}
                                                     events_url
1               https://api.github.com/repos/jtleek/2018/events
2            https://api.github.com/repos/jtleek/ads2020/events
3         https://api.github.com/repos/jtleek/advdatasci/events
4 https://api.github.com/repos/jtleek/advdatasci-project/events
5   https://api.github.com/repos/jtleek/advdatasci-swirl/events
6       https://api.github.com/repos/jtleek/advdatasci15/events
                                                            assignees_url
1               https://api.github.com/repos/jtleek/2018/assignees{/user}
2            https://api.github.com/repos/jtleek/ads2020/assignees{/user}
3         https://api.github.com/repos/jtleek/advdatasci/assignees{/user}
4 https://api.github.com/repos/jtleek/advdatasci-project/assignees{/user}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/assignees{/user}
6       https://api.github.com/repos/jtleek/advdatasci15/assignees{/user}
                                                              branches_url
1               https://api.github.com/repos/jtleek/2018/branches{/branch}
2            https://api.github.com/repos/jtleek/ads2020/branches{/branch}
3         https://api.github.com/repos/jtleek/advdatasci/branches{/branch}
4 https://api.github.com/repos/jtleek/advdatasci-project/branches{/branch}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/branches{/branch}
6       https://api.github.com/repos/jtleek/advdatasci15/branches{/branch}
                                                     tags_url
1               https://api.github.com/repos/jtleek/2018/tags
2            https://api.github.com/repos/jtleek/ads2020/tags
3         https://api.github.com/repos/jtleek/advdatasci/tags
4 https://api.github.com/repos/jtleek/advdatasci-project/tags
5   https://api.github.com/repos/jtleek/advdatasci-swirl/tags
6       https://api.github.com/repos/jtleek/advdatasci15/tags
                                                               blobs_url
1               https://api.github.com/repos/jtleek/2018/git/blobs{/sha}
2            https://api.github.com/repos/jtleek/ads2020/git/blobs{/sha}
3         https://api.github.com/repos/jtleek/advdatasci/git/blobs{/sha}
4 https://api.github.com/repos/jtleek/advdatasci-project/git/blobs{/sha}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/git/blobs{/sha}
6       https://api.github.com/repos/jtleek/advdatasci15/git/blobs{/sha}
                                                           git_tags_url
1               https://api.github.com/repos/jtleek/2018/git/tags{/sha}
2            https://api.github.com/repos/jtleek/ads2020/git/tags{/sha}
3         https://api.github.com/repos/jtleek/advdatasci/git/tags{/sha}
4 https://api.github.com/repos/jtleek/advdatasci-project/git/tags{/sha}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/git/tags{/sha}
6       https://api.github.com/repos/jtleek/advdatasci15/git/tags{/sha}
                                                           git_refs_url
1               https://api.github.com/repos/jtleek/2018/git/refs{/sha}
2            https://api.github.com/repos/jtleek/ads2020/git/refs{/sha}
3         https://api.github.com/repos/jtleek/advdatasci/git/refs{/sha}
4 https://api.github.com/repos/jtleek/advdatasci-project/git/refs{/sha}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/git/refs{/sha}
6       https://api.github.com/repos/jtleek/advdatasci15/git/refs{/sha}
                                                               trees_url
1               https://api.github.com/repos/jtleek/2018/git/trees{/sha}
2            https://api.github.com/repos/jtleek/ads2020/git/trees{/sha}
3         https://api.github.com/repos/jtleek/advdatasci/git/trees{/sha}
4 https://api.github.com/repos/jtleek/advdatasci-project/git/trees{/sha}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/git/trees{/sha}
6       https://api.github.com/repos/jtleek/advdatasci15/git/trees{/sha}
                                                           statuses_url
1               https://api.github.com/repos/jtleek/2018/statuses/{sha}
2            https://api.github.com/repos/jtleek/ads2020/statuses/{sha}
3         https://api.github.com/repos/jtleek/advdatasci/statuses/{sha}
4 https://api.github.com/repos/jtleek/advdatasci-project/statuses/{sha}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/statuses/{sha}
6       https://api.github.com/repos/jtleek/advdatasci15/statuses/{sha}
                                                     languages_url
1               https://api.github.com/repos/jtleek/2018/languages
2            https://api.github.com/repos/jtleek/ads2020/languages
3         https://api.github.com/repos/jtleek/advdatasci/languages
4 https://api.github.com/repos/jtleek/advdatasci-project/languages
5   https://api.github.com/repos/jtleek/advdatasci-swirl/languages
6       https://api.github.com/repos/jtleek/advdatasci15/languages
                                                     stargazers_url
1               https://api.github.com/repos/jtleek/2018/stargazers
2            https://api.github.com/repos/jtleek/ads2020/stargazers
3         https://api.github.com/repos/jtleek/advdatasci/stargazers
4 https://api.github.com/repos/jtleek/advdatasci-project/stargazers
5   https://api.github.com/repos/jtleek/advdatasci-swirl/stargazers
6       https://api.github.com/repos/jtleek/advdatasci15/stargazers
                                                     contributors_url
1               https://api.github.com/repos/jtleek/2018/contributors
2            https://api.github.com/repos/jtleek/ads2020/contributors
3         https://api.github.com/repos/jtleek/advdatasci/contributors
4 https://api.github.com/repos/jtleek/advdatasci-project/contributors
5   https://api.github.com/repos/jtleek/advdatasci-swirl/contributors
6       https://api.github.com/repos/jtleek/advdatasci15/contributors
                                                     subscribers_url
1               https://api.github.com/repos/jtleek/2018/subscribers
2            https://api.github.com/repos/jtleek/ads2020/subscribers
3         https://api.github.com/repos/jtleek/advdatasci/subscribers
4 https://api.github.com/repos/jtleek/advdatasci-project/subscribers
5   https://api.github.com/repos/jtleek/advdatasci-swirl/subscribers
6       https://api.github.com/repos/jtleek/advdatasci15/subscribers
                                                     subscription_url
1               https://api.github.com/repos/jtleek/2018/subscription
2            https://api.github.com/repos/jtleek/ads2020/subscription
3         https://api.github.com/repos/jtleek/advdatasci/subscription
4 https://api.github.com/repos/jtleek/advdatasci-project/subscription
5   https://api.github.com/repos/jtleek/advdatasci-swirl/subscription
6       https://api.github.com/repos/jtleek/advdatasci15/subscription
                                                           commits_url
1               https://api.github.com/repos/jtleek/2018/commits{/sha}
2            https://api.github.com/repos/jtleek/ads2020/commits{/sha}
3         https://api.github.com/repos/jtleek/advdatasci/commits{/sha}
4 https://api.github.com/repos/jtleek/advdatasci-project/commits{/sha}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/commits{/sha}
6       https://api.github.com/repos/jtleek/advdatasci15/commits{/sha}
                                                           git_commits_url
1               https://api.github.com/repos/jtleek/2018/git/commits{/sha}
2            https://api.github.com/repos/jtleek/ads2020/git/commits{/sha}
3         https://api.github.com/repos/jtleek/advdatasci/git/commits{/sha}
4 https://api.github.com/repos/jtleek/advdatasci-project/git/commits{/sha}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/git/commits{/sha}
6       https://api.github.com/repos/jtleek/advdatasci15/git/commits{/sha}
                                                              comments_url
1               https://api.github.com/repos/jtleek/2018/comments{/number}
2            https://api.github.com/repos/jtleek/ads2020/comments{/number}
3         https://api.github.com/repos/jtleek/advdatasci/comments{/number}
4 https://api.github.com/repos/jtleek/advdatasci-project/comments{/number}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/comments{/number}
6       https://api.github.com/repos/jtleek/advdatasci15/comments{/number}
                                                                issue_comment_url
1               https://api.github.com/repos/jtleek/2018/issues/comments{/number}
2            https://api.github.com/repos/jtleek/ads2020/issues/comments{/number}
3         https://api.github.com/repos/jtleek/advdatasci/issues/comments{/number}
4 https://api.github.com/repos/jtleek/advdatasci-project/issues/comments{/number}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/issues/comments{/number}
6       https://api.github.com/repos/jtleek/advdatasci15/issues/comments{/number}
                                                             contents_url
1               https://api.github.com/repos/jtleek/2018/contents/{+path}
2            https://api.github.com/repos/jtleek/ads2020/contents/{+path}
3         https://api.github.com/repos/jtleek/advdatasci/contents/{+path}
4 https://api.github.com/repos/jtleek/advdatasci-project/contents/{+path}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/contents/{+path}
6       https://api.github.com/repos/jtleek/advdatasci15/contents/{+path}
                                                                     compare_url
1               https://api.github.com/repos/jtleek/2018/compare/{base}...{head}
2            https://api.github.com/repos/jtleek/ads2020/compare/{base}...{head}
3         https://api.github.com/repos/jtleek/advdatasci/compare/{base}...{head}
4 https://api.github.com/repos/jtleek/advdatasci-project/compare/{base}...{head}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/compare/{base}...{head}
6       https://api.github.com/repos/jtleek/advdatasci15/compare/{base}...{head}
                                                     merges_url
1               https://api.github.com/repos/jtleek/2018/merges
2            https://api.github.com/repos/jtleek/ads2020/merges
3         https://api.github.com/repos/jtleek/advdatasci/merges
4 https://api.github.com/repos/jtleek/advdatasci-project/merges
5   https://api.github.com/repos/jtleek/advdatasci-swirl/merges
6       https://api.github.com/repos/jtleek/advdatasci15/merges
                                                                    archive_url
1               https://api.github.com/repos/jtleek/2018/{archive_format}{/ref}
2            https://api.github.com/repos/jtleek/ads2020/{archive_format}{/ref}
3         https://api.github.com/repos/jtleek/advdatasci/{archive_format}{/ref}
4 https://api.github.com/repos/jtleek/advdatasci-project/{archive_format}{/ref}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/{archive_format}{/ref}
6       https://api.github.com/repos/jtleek/advdatasci15/{archive_format}{/ref}
                                                     downloads_url
1               https://api.github.com/repos/jtleek/2018/downloads
2            https://api.github.com/repos/jtleek/ads2020/downloads
3         https://api.github.com/repos/jtleek/advdatasci/downloads
4 https://api.github.com/repos/jtleek/advdatasci-project/downloads
5   https://api.github.com/repos/jtleek/advdatasci-swirl/downloads
6       https://api.github.com/repos/jtleek/advdatasci15/downloads
                                                              issues_url
1               https://api.github.com/repos/jtleek/2018/issues{/number}
2            https://api.github.com/repos/jtleek/ads2020/issues{/number}
3         https://api.github.com/repos/jtleek/advdatasci/issues{/number}
4 https://api.github.com/repos/jtleek/advdatasci-project/issues{/number}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/issues{/number}
6       https://api.github.com/repos/jtleek/advdatasci15/issues{/number}
                                                              pulls_url
1               https://api.github.com/repos/jtleek/2018/pulls{/number}
2            https://api.github.com/repos/jtleek/ads2020/pulls{/number}
3         https://api.github.com/repos/jtleek/advdatasci/pulls{/number}
4 https://api.github.com/repos/jtleek/advdatasci-project/pulls{/number}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/pulls{/number}
6       https://api.github.com/repos/jtleek/advdatasci15/pulls{/number}
                                                              milestones_url
1               https://api.github.com/repos/jtleek/2018/milestones{/number}
2            https://api.github.com/repos/jtleek/ads2020/milestones{/number}
3         https://api.github.com/repos/jtleek/advdatasci/milestones{/number}
4 https://api.github.com/repos/jtleek/advdatasci-project/milestones{/number}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/milestones{/number}
6       https://api.github.com/repos/jtleek/advdatasci15/milestones{/number}
                                                                               notifications_url
1               https://api.github.com/repos/jtleek/2018/notifications{?since,all,participating}
2            https://api.github.com/repos/jtleek/ads2020/notifications{?since,all,participating}
3         https://api.github.com/repos/jtleek/advdatasci/notifications{?since,all,participating}
4 https://api.github.com/repos/jtleek/advdatasci-project/notifications{?since,all,participating}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/notifications{?since,all,participating}
6       https://api.github.com/repos/jtleek/advdatasci15/notifications{?since,all,participating}
                                                            labels_url
1               https://api.github.com/repos/jtleek/2018/labels{/name}
2            https://api.github.com/repos/jtleek/ads2020/labels{/name}
3         https://api.github.com/repos/jtleek/advdatasci/labels{/name}
4 https://api.github.com/repos/jtleek/advdatasci-project/labels{/name}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/labels{/name}
6       https://api.github.com/repos/jtleek/advdatasci15/labels{/name}
                                                          releases_url
1               https://api.github.com/repos/jtleek/2018/releases{/id}
2            https://api.github.com/repos/jtleek/ads2020/releases{/id}
3         https://api.github.com/repos/jtleek/advdatasci/releases{/id}
4 https://api.github.com/repos/jtleek/advdatasci-project/releases{/id}
5   https://api.github.com/repos/jtleek/advdatasci-swirl/releases{/id}
6       https://api.github.com/repos/jtleek/advdatasci15/releases{/id}
                                                     deployments_url
1               https://api.github.com/repos/jtleek/2018/deployments
2            https://api.github.com/repos/jtleek/ads2020/deployments
3         https://api.github.com/repos/jtleek/advdatasci/deployments
4 https://api.github.com/repos/jtleek/advdatasci-project/deployments
5   https://api.github.com/repos/jtleek/advdatasci-swirl/deployments
6       https://api.github.com/repos/jtleek/advdatasci15/deployments
            created_at           updated_at            pushed_at
1 2018-10-31T13:50:37Z 2018-10-31T13:50:41Z 2018-10-30T18:13:41Z
2 2020-05-18T00:45:46Z 2020-06-27T15:14:17Z 2020-05-22T22:16:49Z
3 2017-08-25T10:38:16Z 2020-07-13T14:18:23Z 2020-05-17T23:06:04Z
4 2017-11-20T18:33:29Z 2017-11-20T18:35:50Z 2017-11-20T18:40:45Z
5 2015-12-07T17:43:53Z 2018-04-30T21:02:30Z 2015-12-09T20:21:13Z
6 2015-08-30T22:27:26Z 2019-12-06T08:56:59Z 2015-12-16T18:12:31Z
                                         git_url
1               git://github.com/jtleek/2018.git
2            git://github.com/jtleek/ads2020.git
3         git://github.com/jtleek/advdatasci.git
4 git://github.com/jtleek/advdatasci-project.git
5   git://github.com/jtleek/advdatasci-swirl.git
6       git://github.com/jtleek/advdatasci15.git
                                       ssh_url
1               git@github.com:jtleek/2018.git
2            git@github.com:jtleek/ads2020.git
3         git@github.com:jtleek/advdatasci.git
4 git@github.com:jtleek/advdatasci-project.git
5   git@github.com:jtleek/advdatasci-swirl.git
6       git@github.com:jtleek/advdatasci15.git
                                         clone_url
1               https://github.com/jtleek/2018.git
2            https://github.com/jtleek/ads2020.git
3         https://github.com/jtleek/advdatasci.git
4 https://github.com/jtleek/advdatasci-project.git
5   https://github.com/jtleek/advdatasci-swirl.git
6       https://github.com/jtleek/advdatasci15.git
                                       svn_url
1               https://github.com/jtleek/2018
2            https://github.com/jtleek/ads2020
3         https://github.com/jtleek/advdatasci
4 https://github.com/jtleek/advdatasci-project
5   https://github.com/jtleek/advdatasci-swirl
6       https://github.com/jtleek/advdatasci15
                                homepage   size stargazers_count watchers_count
1 https://jhu-advdatasci.github.io/2018/  60855                0              0
2                                   <NA>     12               32             32
3                                   <NA> 172184               17             17
4                                   <NA>    680                0              0
5                                   <NA>    188                4              4
6                                   <NA> 200992               14             14
  language has_issues has_projects has_downloads has_wiki has_pages forks_count
1     HTML      FALSE         TRUE          TRUE     TRUE     FALSE           2
2     <NA>       TRUE         TRUE          TRUE     TRUE     FALSE           3
3     HTML      FALSE         TRUE          TRUE     TRUE      TRUE           8
4     HTML       TRUE         TRUE          TRUE     TRUE     FALSE           0
5     HTML       TRUE         TRUE          TRUE     TRUE     FALSE           2
6     HTML       TRUE         TRUE          TRUE     TRUE      TRUE           7
  mirror_url archived disabled open_issues_count license.key license.name
1         NA    FALSE    FALSE                 0        <NA>         <NA>
2         NA    FALSE    FALSE                 0        <NA>         <NA>
3         NA    FALSE    FALSE                 0        <NA>         <NA>
4         NA    FALSE    FALSE                 0        <NA>         <NA>
5         NA    FALSE    FALSE                 1        <NA>         <NA>
6         NA    FALSE    FALSE                 0        <NA>         <NA>
  license.spdx_id license.url license.node_id forks open_issues watchers
1            <NA>        <NA>            <NA>     2           0        0
2            <NA>        <NA>            <NA>     3           0       32
3            <NA>        <NA>            <NA>     8           0       17
4            <NA>        <NA>            <NA>     0           0        0
5            <NA>        <NA>            <NA>     2           1        4
6            <NA>        <NA>            <NA>     7           0       14
  default_branch
1         master
2         master
3         master
4         master
5         master
6       gh-pages

Data frame structure from JSON

dim(jsonData)
[1] 30 73
head(jsonData$name)
[1] "2018"               "ads2020"            "advdatasci"        
[4] "advdatasci-project" "advdatasci-swirl"   "advdatasci15"      
class(jsonData$owner) #Some of the columns is a data frame!
[1] "data.frame"
dim(jsonData$owner); names(jsonData$owner)
[1] 30 18
 [1] "login"               "id"                  "node_id"            
 [4] "avatar_url"          "gravatar_id"         "url"                
 [7] "html_url"            "followers_url"       "following_url"      
[10] "gists_url"           "starred_url"         "subscriptions_url"  
[13] "organizations_url"   "repos_url"           "events_url"         
[16] "received_events_url" "type"                "site_admin"         

JSON Lab
https://SISBIB.github.io/Module1/labs/json-lab.Rmd

Web Scraping

This is data

View the source

What the computer sees

Ways to see the source

Chrome:

  1. right click on page
  2. select “view source”

Firefox:

  1. right click on page

  2. select “view source” Microsoft Edge:

  3. right click on page

  4. select “view source”

Safari

  1. click on “Safari”
  2. select “Preferences”
  3. go to “Advanced”
  4. check “Show Develop menu in menu bar”
  5. click on “Develop”
  6. select “show page source”
  7. alternatively to 5./6., right click on page and select “view source”

https://github.com/simonmunzert/rscraping-jsm-2016/blob/c04fd91fec711df65c838e07723125155a7f2cda/02-scraping-with-rvest.r

Inspect element

Copy XPath

Use SelectorGadget

rvest package

recount_url = "http://bowtie-bio.sourceforge.net/recount/"
# install.packages("rvest")
library(rvest)
htmlfile = read_html(recount_url)

nds = html_nodes(htmlfile,                
xpath='//*[@id="recounttab"]/table')
dat = html_table(nds)
dat = as.data.frame(dat)
head(dat)
       X1                                         X2      X3
1   Study                                       PMID Species
2 bodymap not published, but publicly available here   human
3  cheung                                   20856902   human
4    core                                   19056941   human
5   gilad                                   20009012   human
6    maqc                                   20167110   human
                                X4                               X5
1  Number of biological replicates Number of uniquely aligned reads
2                               19                    2,197,622,796
3                               41                      834,584,950
4                                2                        8,670,342
5                                6                       41,356,738
6 14 (technical)**  2 (biological)                       71,970,164
               X6              X7              X8
1   ExpressionSet     Count table Phenotype table
2            link            link            link
3            link            link            link
4            link            link            link
5            link            link            link
6 original pooled original pooled original pooled
                                               X9
1                                           Notes
2 Illumina Human BodyMap 2.0 -- tissue comparison
3                                    HapMap - CEU
4                                lung fibroblasts
5                        liver; males and femlaes
6                              experiment: MAQC-2

Little cleanup

colnames(dat) = as.character(dat[1,])
dat = dat[-1,]
head(dat)
       Study                                       PMID Species
2    bodymap not published, but publicly available here   human
3     cheung                                   20856902   human
4       core                                   19056941   human
5      gilad                                   20009012   human
6       maqc                                   20167110   human
7 montgomery                                   20220756   human
   Number of biological replicates Number of uniquely aligned reads
2                               19                    2,197,622,796
3                               41                      834,584,950
4                                2                        8,670,342
5                                6                       41,356,738
6 14 (technical)**  2 (biological)                       71,970,164
7                               60                     *886,468,054
    ExpressionSet     Count table Phenotype table
2            link            link            link
3            link            link            link
4            link            link            link
5            link            link            link
6 original pooled original pooled original pooled
7            link            link            link
                                            Notes
2 Illumina Human BodyMap 2.0 -- tissue comparison
3                                    HapMap - CEU
4                                lung fibroblasts
5                        liver; males and femlaes
6                              experiment: MAQC-2
7                                    HapMap - CEU

APIs

Application Programming Interfaces

In biology too!

Step 0: Did someone do this already

Do it yourself: read the Docs

Read the docs

Read the docs

Read the docs

A dissected example

The base URL

The Path: Search repositories

Create a query - pass the q parameter

Date repo was created

Language repo is in

Ignore repos from “cran”

#install.packages("httr")
library(httr)

query_url = paste0("https://api.github.com/", "search/repositories", 
                   "?q=created:2014-08-13", "+language:r", "+-user:cran")

req = GET(query_url)
names(content(req))
[1] "total_count"        "incomplete_results" "items"             

Not all APIs are “open”

(see also twitteR package)

myapp = oauth_app("twitter",
                   key="yourConsumerKeyHere",secret="yourConsumerSecretHere")
sig = sign_oauth1.0(myapp,
                     token = "yourTokenHere",
                      token_secret = "yourTokenSecretHere")
homeTL = GET("https://api.twitter.com/1.1/statuses/home_timeline.json", sig)

# But you can get cool data
json1 = content(homeTL)
json2 = jsonlite::fromJSON(toJSON(json1))
json2[1,1:4]
                    created_at           id             id_str
1 Mon Jan 13 05:18:04 +0000 2014 4.225984e+17 422598398940684288
                                                                                                                                         text
1 Now that P. Norvig's regex golf IPython notebook hit Slashdot, let's see if our traffic spike tops the previous one: http://t.co/Vc6JhZXOo8